package cn.mzcode.recheck.service.impl;

import cn.mzcode.recheck.RecheckApplication;
import cn.mzcode.recheck.entity.RecheckFile;
import cn.mzcode.recheck.entity.RecheckSim;
import cn.mzcode.recheck.resp.ServerResponse;
import cn.mzcode.recheck.service.RecheckService;
import cn.mzcode.recheck.util.FileUtil;
import cn.mzcode.recheck.util.ZipAndRarUtil;
import com.alibaba.fastjson.JSON;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.boot.system.ApplicationHome;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import org.springframework.web.multipart.MultipartFile;

import java.io.*;
import java.util.*;
import java.util.stream.Collectors;

import static java.util.Map.Entry.comparingByValue;

/**
 * @author: 马壮
 * @create: 2020-04-28 22:14
 * @description: 查重服务实现
 */
@Slf4j
@Service
public class RecheckServiceImpl implements RecheckService {

    private static String ROOT_PATH;
    private static String SRC_PATH;
    private static String UNZIP_PATH;
    private static String RESULT_PATH;

    static {
        ROOT_PATH = new ApplicationHome(RecheckApplication.class).getSource().getParentFile().getPath() + "/upload/";
        SRC_PATH = ROOT_PATH + "src/";
        UNZIP_PATH = ROOT_PATH + "unzip/";
        RESULT_PATH = ROOT_PATH + "result/";
    }

    @Override
    public String saveSrcFile(MultipartFile file) throws IOException {
        String uuid = UUID.randomUUID().toString().replaceAll("-", "");
        String fileName = file.getOriginalFilename();
        String suffix = fileName.substring(fileName.lastIndexOf("."));
        File srcFile = new File(SRC_PATH + uuid + suffix);
        file.transferTo(srcFile);
        return uuid;
    }

    @Override
    public boolean unzipByRecheckNo(String recheckNo) {
        String fileName = this.findFullSrcFileNameByReckeckNo(recheckNo);
        if (StringUtils.isEmpty(fileName)) {
            return false;
        }
        if (fileName.endsWith(".zip")) {
            return ZipAndRarUtil.unzip(fileName, UNZIP_PATH + recheckNo + "/");
        } else if (fileName.endsWith(".rar")) {
            return ZipAndRarUtil.unrar(fileName, UNZIP_PATH + recheckNo + "/");
        } else {
            log.error("[RecheckServiceImpl][unzipByRecheckNo]文件格式不正确，查重编码={}", recheckNo);
            return false;
        }
    }

    @Override
    public ServerResponse preVerifyStructure(String recheckNo) {
        String rootPath = UNZIP_PATH + recheckNo;
        File rootFile = new File(rootPath);
        if (!rootFile.exists() || !rootFile.isDirectory()) {
            return ServerResponse.createByErrorMessage("查重编号无对应文件");
        }
        // 没有规定的排除文本，为错误
        String exception = rootPath + "/exception.txt";
        if (!new File(exception).exists()) {
            return ServerResponse.createByErrorMessage("指定位置没有找到exception.txt");
        }
        // 根目录只能有一个文件夹和一个排除文本，文件夹数量不等于1,为错误
        List<File> collect = Arrays.asList(new File(rootPath).listFiles()).stream()
                .filter(File::isDirectory).collect(Collectors.toList());
        if (CollectionUtils.isEmpty(collect) || collect.size() > 1) {
            return ServerResponse.createByErrorMessage("根目录只能有一个文件夹和exception.txt");
        }
        File dir = collect.get(0);
        String[] tagNames = dir.list();
        if (tagNames == null || tagNames.length == 0) {
            return ServerResponse.createByErrorMessage("作业文件夹为空");
        }
        // 不受支持的文件
        List<String> any = Arrays.asList(tagNames).stream().filter(i -> {
            return !(i.endsWith(".doc") || i.endsWith(".docx") || i.endsWith(".pdf") || i.endsWith(".txt"))
                    || i.startsWith(("~$"));
        }).collect(Collectors.toList());
        // 存在不受支持的作业文件
        if (!CollectionUtils.isEmpty(any)) {
            return ServerResponse.createByErrorMessage("作业文件夹内存在不受支持的文件格式或下级文件夹,详情：" + JSON.toJSONString(any));
        }
        return ServerResponse.createBySuccess();
    }

    @Override
    public void cleanFileByRecheckNo(String recheckNo) {
        // 删除源文件
        String srcFileName = this.findFullSrcFileNameByReckeckNo(recheckNo);
        if (StringUtils.isNotEmpty(srcFileName)) {
            new File(srcFileName).delete();
        }
        // 删除解压文件
        FileUtil.deleteDir(UNZIP_PATH + recheckNo);
        // 删除查重结果
        FileUtil.deleteDir(RESULT_PATH + recheckNo);
    }

    @Async
    @Override
    public void startAsyncCalcSim(String recheckNo) {
        /**
         * 读取排除文本
         */
        String exceptionContent = FileUtil.read(UNZIP_PATH + recheckNo + "/exception.txt");
        Map<String, Integer> exception = this.collectWordFrequency(exceptionContent);
        /**
         * 读取所有作业，并去除排除文本
         */
        File dir = Arrays.asList(new File(UNZIP_PATH + recheckNo).listFiles()).stream()
                .filter(File::isDirectory).findFirst().get();
        String rootPath = dir.getAbsolutePath();
        List<RecheckFile> recheckFiles = Arrays.asList(dir.list()).stream()
                .map(item -> {
                    String content = FileUtil.read(rootPath + "/" + item);
                    return RecheckFile.builder()
                            .userName(item.substring(0, item.lastIndexOf(".")))
                            .wordFrequency(this.collectWordFrequency(content))
                            .build();
                }).map(item -> item.excludeWordFrequency(exception)).collect(Collectors.toList());
        /**
         * 两两匹配，计算向量
         */
        HashMap<String, HashMap<String, Double>> resultMap = new HashMap<>();
        List<RecheckSim> resultList = new ArrayList<>();
        for (int i = 0; i < recheckFiles.size() - 1; i++) {
            for (int j = i + 1; j < recheckFiles.size(); j++) {
                RecheckFile rfa = recheckFiles.get(i);
                RecheckFile rfb = recheckFiles.get(j);
                double sim = this.calcRecheckFileSim(rfa, rfb);
                resultList.add(RecheckSim.builder()
                        .user1(rfa.getUserName())
                        .user2(rfb.getUserName())
                        .sim(sim)
                        .build());
                if (!resultMap.containsKey(rfa.getUserName())) {
                    resultMap.put(rfa.getUserName(), new HashMap<>());
                }
                resultMap.get(rfa.getUserName()).put(rfb.getUserName(), sim);
                if (!resultMap.containsKey(rfb.getUserName())) {
                    resultMap.put(rfb.getUserName(), new HashMap<>());
                }
                resultMap.get(rfb.getUserName()).put(rfa.getUserName(), sim);
            }
        }
        // 保存查重结果对象到文件
        this.saveRecheckResult(resultList, recheckNo);
        // 转换查重结果为excel，并保存到本地
        this.saveExcel(resultMap, recheckNo);
    }

    @Override
    public void saveRecheckResult(List<RecheckSim> result, String recheckNo) {
        File root = new File(RESULT_PATH + recheckNo);
        if (!root.exists()) {
            root.mkdirs();
        }
        try (ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(RESULT_PATH + recheckNo + "/" + recheckNo))) {
            oos.writeObject(result);
        } catch (Exception e) {
            log.error("[RecheckServiceImpl][saveRecheckResult]保存查重结果到本地失败，查重编号={}，查重结果={}", recheckNo, JSON.toJSONString(result));
        }
    }

    /**
     * 加载查重结果
     *
     * @param recheckNo 查重编号
     * @return
     */
    @Override
    public List<RecheckSim> loadRecheckResult(String recheckNo) {
        try (ObjectInputStream ois = new ObjectInputStream(new FileInputStream(RESULT_PATH + recheckNo + "/" + recheckNo))) {
            return (List<RecheckSim>) ois.readObject();
        } catch (Exception e) {
            log.error("[RecheckServiceImpl][loadRecheckResult]加载查重结果失败，查重编号={}", recheckNo);
            return null;
        }
    }

    @Override
    public boolean recheckComplete(String recheckNo) {
        File file = new File(RESULT_PATH + recheckNo);
        return file.exists();
    }

    @Override
    public boolean recheckUploaded(String recheckNo) {
        String path = this.findFullSrcFileNameByReckeckNo(recheckNo);
        return StringUtils.isNotEmpty(path);
    }

    @Override
    public String getResultPath() {
        return RESULT_PATH;
    }

    /**
     * 转换查重结果为excel，存储到磁盘
     *
     * @param resultMap 查重结果Map
     * @param recheckNo 查重编号
     */
    private void saveExcel(HashMap<String, HashMap<String, Double>> resultMap, String recheckNo) {
        try (FileOutputStream out = new FileOutputStream(RESULT_PATH + recheckNo + "/" + recheckNo + ".xlsx");) {
            XSSFWorkbook wb = new XSSFWorkbook();
            Sheet sheet = wb.createSheet();
            /**
             * 排序，excel左上角优先相似度高的
             */
            List<Map.Entry<String, HashMap<String, Double>>> result = resultMap.entrySet().stream()
                    .peek(item -> {
                        HashMap<String, Double> sorted = item.getValue().entrySet().stream()
                                .sorted(Collections.reverseOrder(comparingByValue()))
                                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue,
                                        (v1, v2) -> v2, LinkedHashMap::new));
                        item.setValue(sorted);
                    })
                    .sorted(Comparator.comparing(i -> i, (item1, item2) -> {
                        Double v1 = item1.getValue().entrySet().iterator().next().getValue();
                        Double v2 = item2.getValue().entrySet().iterator().next().getValue();
                        return v1.compareTo(v2) * -1;
                    }))
                    .collect(Collectors.toList());
            for (int i = 0; i < result.size(); i++) {
                Row nRow = sheet.createRow(i);
                String userName = result.get(i).getKey();
                nRow.createCell(0).setCellValue(userName);
                HashMap<String, Double> child = result.get(i).getValue();
                int index = 1;
                for (Map.Entry<String, Double> entry : child.entrySet()) {
                    nRow.createCell(index++).setCellValue(entry.getKey());
                    nRow.createCell(index++).setCellValue(entry.getValue());
                }
            }
            wb.write(out);
        } catch (Exception e) {
            log.error("[RecheckServiceImpl][saveExcel]查重结果写入Excel失败，查重编号={}", recheckNo, e);
        }
    }

    /**
     * 根据查重编号查找源文件全路径
     *
     * @param recheckNo
     * @return
     */
    private String findFullSrcFileNameByReckeckNo(String recheckNo) {
        /**
         * 查重编码只是文件名，没有后缀，所以这里拿文件名去匹配
         */
        String[] fileNames = new File(SRC_PATH).list(new FilenameFilter() {
            @Override
            public boolean accept(File dir, String name) {
                return name.startsWith(recheckNo);
            }
        });
        if (fileNames == null || fileNames.length != 1) {
            return null;
        }
        return SRC_PATH + fileNames[0];
    }

    /**
     * 统计文本词频，长度2，滑动切割文本
     *
     * @param content 文本内容
     * @return
     */
    private Map<String, Integer> collectWordFrequency(String content) {
        List<String> word = new ArrayList<>();
        for (int i = 0; i < content.length() - 1; i++) {
            word.add(content.substring(i, i + 2));
        }
        // 统计词频
        return word.stream().collect(Collectors.groupingBy(str -> str
                , Collectors.reducing(0, e -> 1, Integer::sum)));
    }

    /**
     * 计算两个作业的相似度
     *
     * @param f1 作业1
     * @param f2 作业2
     * @return 相似度
     */
    private double calcRecheckFileSim(RecheckFile f1, RecheckFile f2) {
        Map<String, Integer> wfa = f1.getWordFrequency();
        Map<String, Integer> wfb = f2.getWordFrequency();
        Set<String> keySet = new HashSet<String>() {{
            addAll(wfa.keySet());
            addAll(wfb.keySet());
        }};
        ArrayList<Float> va = new ArrayList<Float>();
        ArrayList<Float> vb = new ArrayList<Float>();

        for (String key : keySet) {
            va.add(wfa.getOrDefault(key, 0).floatValue());
            vb.add(wfb.getOrDefault(key, 0).floatValue());
        }
        return this.calcVector(va, vb);
    }

    /**
     * 计算向量相似度
     *
     * @param va 向量A
     * @param vb 向量B
     * @return 相似度
     */
    private float calcVector(ArrayList<Float> va, ArrayList<Float> vb) {
        if (va.size() != vb.size()) {
            return 0;
        }

        int size = va.size();
        float simVal = 0;

        // numerator分子
        float num = 0;
        // denominator分母
        float den = 1;

        double a = 0, b = 0;
        for (int i = 0; i < size; i++) {
            num += va.get(i) * vb.get(i);
        }
        for (int j = 0; j < size; j++) {
            a += Math.pow(va.get(j), 2);
            b += Math.pow(vb.get(j), 2);
        }
        double s = Math.sqrt(a) * Math.sqrt(b);
        den = (float) s;

        if (den == 0) {
            den = 1;
        }
        simVal = num / den;
        return simVal;
    }
}
